library("SummarizedExperiment")
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## Loading required package: BiocParallel
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
library("readODS")
library("scatterD3")
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
pcaAnalysis <- function(sExpt, iqr.cutoff=0.1, category.selected = c("standard", "test"),
                        plot=c("2d", "3d")) {
  ###########################################################################
  ## PART I. Filter samples according to phenoData
  ###########################################################################
  ## o phenoData table contains which samples should be used in the analysis
  ##    o the samples which should be used in the analysis will have an
  ##     assigned category sExpt@phenoData$category, as "standard" or "test"
  ##    o non-assigned samples with NA values will be ignored
  pdata <- colData(sExpt)
  ## filter out samples with missing category and/or general cell type
  pdata.sel <- .filterPheno(pdata, fun.main, "na")
  
  ############################################################################
  ## PART II. Filter genes in dataset
  ############################################################################
  ## A. Keep only the samples in the selected category (either 'standard' or 'test')
  sel <- pdata.sel$category %in% category.selected
  if (sum(sel) == 0)
    stop(paste("No sample in the selected category was found, exiting function",
               fun.main))
  pdata.sel <- pdata.sel[sel, ]
  ynorm <- assay(sExpt[, sel], "counts")
  
  ## B. Filter out not variable probes,
  ##    o variance in terms of IQR of median expressions
  ##    o want to get rid of probes that are below a given IQR threshold
  
  ## Calculate IQR of the samples and filter by the given IQR threshold
  medIQR <- apply(ynorm, 1, IQR)
  selGenes <- medIQR >= quantile(medIQR, probs=1 - iqr.cutoff)
  if (sum(selGenes) == 0) {
    ## this should almost never happen
    stop(paste("No gene passed the IQR-based filtering, exiting function",
               fun.main))
  }
  
  ## We keep for score calculation the gene-filtered dataset with standards
  ## and test samples
  ynormIQR <- assay(sExpt[selGenes, rownames(pdata.sel)], "counts")
  
  ############################################################################
  ## PART III. PCA Analysis of the filtered data
  ############################################################################
  
  ## Remove any rows that have zero variance
  sel <- apply(ynormIQR, 1, function(x) sd(x) != 0)
  ynormIQR <- ynormIQR[sel,]
  
  pca <- prcomp(t(ynormIQR), scale=TRUE)
  
  ## Get proportion of variance explained by PC1 and PC2
  pca.sum <- summary(pca)
  pc1 <- pca.sum$importance[2,1] * 100
  pc2 <- pca.sum$importance[2,2] * 100
  pc3 <- pca.sum$importance[2,3] * 100
  
  print(pc1)
  print(pc2)
  print(pc3)
    
  ## Extract coordinates from pca object
  pca.comp <- as.data.frame(pca$x[, c(1, 2, 3)])
  pca.comp <- cbind(pca.comp,
    cell_type = pdata.sel[, "cell_type"],
    sample_name = rownames(pca.comp))

  if (plot == "2d") {
    ## 2D plot of the first and second principal components
    tooltips <- paste(" <strong>", pca.comp$sample_name,"</strong><br />", pca.comp$cell_type)
    scatterD3(data = pca.comp, x=PC1, y=PC2, tooltip_text = tooltips,
            col_var=cell_type)
  } else {
    ## 3D plot
    fig <- plot_ly(pca.comp, x = ~PC1, y = ~PC2, z = ~PC3, color = ~cell_type,
                   text = ~sample_name)
    fig <- fig %>% add_markers()
    fig <- fig %>% layout(scene = list(xaxis = list(title = paste('PC1(', pc1, '%)')),
                                     yaxis = list(title = paste('PC2(', pc2, '%)')),
                                     zaxis = list(title = paste('PC3(', pc3, '%)'))), 
                          title = paste('3D plot of PCA analysis result (', category.selected,
                                        ' cells, IQR cutoff: ', iqr.cutoff))
  
    fig
  }
}

################################################################################
## FUNCTION: .filterPheno
################################################################################
#' From utils.R in CellScore
#' Filters samples from phenotype data with missing info
#' @param pheno a DataFrame with phenotype descriptions
#' @param calling.fun a character - name of the parent function calling this
#'   function
#' @param flag a character - type of filter, one of the following: 'na',
#'   'group', subgroup', anygroup' (the last option checks for any match in
#'   group or subgroup)
#' @param flag.value a character - cell name needed for the 'specifc' filter
#' @return filtered data.frame
#' @keywords filter
.filterPheno <- function(pheno, calling.fun,
                         flag = c("na", "group", "subgroup", "anygroup"),
                         flag.value=NULL){
    ## If filtering by (sub)groups, the flag.value must be specified
    stopifnot(flag == "na" || !is.null(flag.value))

    sel <- switch(flag,
                  na = !is.na(pheno$category) & !is.na(pheno$general_cell_type),
                  group = pheno$general_cell_type %in% flag.value,
                  subgroup = pheno$sub_cell_type1 %in% flag.value,
                  anygroup = pheno$general_cell_type %in% flag.value |
                      pheno$sub_cell_type1 %in% flag.value)

    if (sum(sel) == 0){
        print(sel)
        stop(paste("No samples left after phenotype filtering, exiting function",
                   calling.fun))
    }

    pheno[sel, ]
}

# read the file selectedDEE2SExpr.rds that stores SummarizedExperiment object
# of the count data.
selectedDEE2SExpr <- readRDS("../rdsFiles/selectedDEE2SExpr.rds")

# PCA analysis of standard samples
pcaAnalysis(selectedDEE2SExpr, 0.1, "standard", "3d")
## [1] 56.654
## [1] 11.964
## [1] 6.282
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
pcaAnalysis(selectedDEE2SExpr, 0.1, "standard", "2d")
## [1] 56.654
## [1] 11.964
## [1] 6.282
pcaAnalysis(selectedDEE2SExpr, 0.2, "standard", "3d")
## [1] 51.991
## [1] 11.536
## [1] 7.38
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
pcaAnalysis(selectedDEE2SExpr, 0.2, "standard", "2d")
## [1] 51.991
## [1] 11.536
## [1] 7.38
pcaAnalysis(selectedDEE2SExpr, 0.3, "standard", "3d")
## [1] 43.101
## [1] 11.322
## [1] 8.146
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
pcaAnalysis(selectedDEE2SExpr, 0.3, "standard", "2d")
## [1] 43.101
## [1] 11.322
## [1] 8.146
# PCA analysis of test samples
pcaAnalysis(selectedDEE2SExpr, 0.1, "test", "3d")
## [1] 61.441
## [1] 8.532
## [1] 5.522
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
pcaAnalysis(selectedDEE2SExpr, 0.1, "test", "2d")
## [1] 61.441
## [1] 8.532
## [1] 5.522
pcaAnalysis(selectedDEE2SExpr, 0.2, "test", "3d")
## [1] 54.79
## [1] 8.924
## [1] 6.069
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
pcaAnalysis(selectedDEE2SExpr, 0.2, "test", "2d")
## [1] 54.79
## [1] 8.924
## [1] 6.069
pcaAnalysis(selectedDEE2SExpr, 0.3, "test", "3d")
## [1] 45.325
## [1] 8.71
## [1] 7.032
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
pcaAnalysis(selectedDEE2SExpr, 0.3, "test", "2d")
## [1] 45.325
## [1] 8.71
## [1] 7.032